Load and clean data

  • Load data including all specifications from previous step.
  • Indicate if item was flagged = 1 or was not flagged = 0
all_specifications <- read.csv("data/tidy/specification_results.csv") %>% 
  dplyr::mutate(row = row_number(),
                hf_2_parameter = hf_2_parameter_vector )   %>% 
  
  # Indicate if item was flagged = 1 or was not flagged = 0
  separate_rows(Flagged_items, sep = "-") %>%
  mutate(value = 1) %>%
  spread(Flagged_items, value, fill = 0) %>% # Creates automatically V1 indicating where no Items were flagged
  # Combine criterion and threshold into variable hf_3_criterion
  mutate(hf_3_criterion = paste0(hf_3_criterion, ": ", hf_4_threshold)) %>% 
  dplyr::select(-c(X)) %>% 
  
  # create overall method variable
  mutate(method = case_when(
    str_detect(hf_3_criterion, "Cox") ~ "R2 Cox",
    str_detect(hf_3_criterion, "McFadden") ~ "R2 McFadden",
    str_detect(hf_3_criterion, "Nagelkerke") ~ "R2 Nagelkerke",
    str_detect(hf_3_criterion, "beta") ~ "beta",
    str_detect(hf_3_criterion, "lr") ~ "lr",
    TRUE ~ "other" #
  )) 

Choose Specifications

Create data sets that have either all specifications, no LR analyses, or only LR analyses.

nolr_specifications <- all_specifications %>% 
  filter(method != "lr") %>% 
  dplyr::select(-row) %>%
  distinct()

specifications_only_lr <- all_specifications %>% 
  filter(method == "lr")

specifications <- all_specifications

Create Vector with Ceiling items

pf_items <- c("PFM1", "PFM2", "PFM3", "PFM4", "PFM6", "PFM7", "PFM9", "PFM10", 
              "PFM12", "PFM15", "PFM16", "PFM17", "PFM18", "PFM19", "PFM21", 
              "PFM23", "PFM25", "PFM26", "PFM27", "PFM28", "PFM29", "PFM32", 
              "PFM33", "PFM34", "PFM35", "PFM36", "PFM37", "PFM38", "PFM40", 
              "PFM43", "PFM44", "PFM46", "PFM49", "PFM51", "PFM53")


Figure: Item Specific Despriptive Specification Plot

Preparation


Which factors

wf_1_comparison_vec <- unique(specifications$wf_1_comparison)

How Factors

hf_1_correction_vec <- unique(specifications$hf_1_correction)
hf_2_parameter_vec <- unique(specifications$hf_2_parameter)
vec <- unique(nolr_specifications$hf_3_criterion)
hf_3_criterion_vec <- vec[order(
  sub(":.*", "", vec) != "beta",
  sub(":.*", "", vec) != "CoxSnell",
  sub(":.*", "", vec) != "McFadden",
  sub(":.*", "", vec) != "Nagelkerke",
  sub(":.*", "", vec),
  as.numeric(sub(".*:", "", vec))
)]

number_which_how_factors <- 4

ylabels <- rev(c(
  "Comparison: USA-GER-ARG",  
  "Comparison: USA-GER",
  "Comparison: USA-ARG", 
  "Comparison: GER-ARG",
  
  "Correction: For Age",  
  "Correction: None",  
  
  "Parameters: Estimated", 
  "Parameters: PROMIS",
  
  "Criterion: Beta: 0.05",
  "Criterion: Beta: 0.10",
  
  "Criterion: Cox-Snell: 0.02",
  "Criterion: Cox-Snell: 0.03",
  "Criterion: Cox-Snell: 0.05",
  "Criterion: McFadden: 0.02",
  "Criterion: McFadden: 0.03",
  "Criterion: McFadden: 0.05",
  "Criterion: Nagelkerke: 0.02",
  "Criterion: Nagelkerke: 0.03",
  "Criterion: Nagelkerke: 0.05"#,
# "Comparison: USA-GER-ARG (1.5%)",  
# "Comparison: USA-GER (0.1%)",
# "Comparison: USA-ARG (1.1%)", 
# "Comparison: GER-ARG (2.2%)",
# 
# "Correction: For Age (2.0%)",  
# "Correction: None (2.8%)",  
# 
# "Parameters: Estimated (2.4%)", 
# "Parameters: PROMIS (2.4%)",
# 
# "Criterion: Beta: 0.05 (0.9%)",
# "Criterion: Beta: 0.10 (0.4%)",
# 
# "Criterion: Cox-Snell: 0.02 (0.8%)",
# "Criterion: Cox-Snell: 0.03 (0.6%)",
# "Criterion: Cox-Snell: 0.05 (0.4%)",
# "Criterion: McFadden: 0.02 (0.5%)",
# "Criterion: McFadden: 0.03 (0.4%)",
# "Criterion: McFadden: 0.05 (0.1%)",
# "Criterion: Nagelkerke: 0.02 (0.4%)",
# "Criterion: Nagelkerke: 0.03 (0.3%)",
# "Criterion: Nagelkerke: 0.05 (0.1%)"#,
# 
  #"Criterion: LR: 1%",
  #"Criterion: LR: 5%",
  #"Criterion: LR Benjamini-Hochberg: 1%",
  #"Criterion: LR Benjamini-Hochberg: 5%",
  #"Criterion: LR Bonferroni: 1%",
  #"Criterion: LR Bonferroni: 5%"
))

# colors for flagged items
cols <- c("orange",
          "purple")

# vector needed for plotting
length_of_each_factor <- c(
  length(hf_3_criterion_vec),
  length(hf_2_parameter_vec) +  length(hf_3_criterion_vec) ,
  length(hf_1_correction_vec) +   length(hf_2_parameter_vec) +  length(hf_3_criterion_vec),
  length(wf_1_comparison_vec) +   length(hf_1_correction_vec) +   length(hf_2_parameter_vec) +  length(hf_3_criterion_vec))

create_tile_plot()

create_tile_plot <- function(specifications, pf_item) {
  
  x_rank <- rank(specifications[[pf_item]],  
                 ties.method = "random")
  
  ### Create all factors
  yvar <- rep(factor(rev(c(
    wf_1_comparison_vec,
    hf_1_correction_vec,
    hf_2_parameter_vec,
    hf_3_criterion_vec)), 
    levels = rev(c(
      wf_1_comparison_vec,
      hf_1_correction_vec,
      hf_2_parameter_vec,
      hf_3_criterion_vec))), 
    times = nrow(specifications))
  
  xvar <- rep(x_rank, 
              each = length(levels(yvar)))
  
  flagged <- rep(specifications[[pf_item]],
                 each = length(levels(yvar)))
  spec <- NULL
  
  for(i in 1:nrow(specifications)) {
    id <- as.numeric(levels(yvar) %in% 
                       as.character(unlist(
                         specifications[i, 1:number_which_how_factors])))  
    spec <- c(spec, id)
  }
  
  plotdata <- data.frame(xvar, 
                         yvar, 
                         spec,
                         flagged) %>% 
    mutate(fill = ifelse(spec == 1 & flagged == 1, 2, ifelse(spec == 1 & flagged == 0, 1, 0)))
  
  
  tile_plot <- ggplot(data = plotdata, 
                      aes(x = xvar, 
                          y = as.factor(yvar), 
                          fill = factor(fill))) +
    geom_raster() + 
    geom_hline(yintercept = length_of_each_factor + 0.5) +  # Change lines here here
    scale_x_continuous(position = "bottom") +
    scale_y_discrete(labels = ylabels) +
    scale_fill_manual(
      values = c("white", cols)) +
    labs(x = "Specification number", 
         y = "Which/How factors") +
    coord_cartesian(
      expand = F, 
      xlim = c(0.5, nrow(specifications) + 0.5)) +
    theme_bw() + 
    theme(legend.position = "none",
          axis.text.y = element_text(colour = "black", size = 8),
          axis.text.x = element_text(colour = "black"),
          axis.ticks = element_line(colour = "black"),
          plot.margin = margin(t = 5.5, 
                               r = 5.5, 
                               b = 5.5, 
                               l = 5.5, 
                               unit = "pt"))
  return(tile_plot)
}
create_tile_plot(specifications, "PFM46")

create_tile_plot(nolr_specifications, "PFM46")

create_flag_plot()

create_flag_plot <- function(specifications, pfm) {
  
  specifications$xvar <- rank(specifications[[pfm]],  
                              ties.method = "random")
  # Create a ggplot for the specified PFM item
  ggplot_spec <- ggplot(specifications, aes(x = xvar, 
                                            y = as.numeric(.data[[pfm]] == 1),
                                            fill = factor(.data[[pfm]]))) +
    geom_tile(aes(width = 1, height = 1)) +
    scale_fill_manual(values = cols) +
    labs(x = "", y = NULL, title = "") +
    guides(fill = FALSE) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    theme_void()
  
  # Return the ggplot object
  return(ggplot_spec)
}

Combine Plots

# Loop over each PFM item
for (pfm in pf_items) {
  # Create the flagged plot
  flagged_plot <- create_flag_plot(nolr_specifications, pfm)
  
  # Create the tile plot
  tile_plot <- create_tile_plot(nolr_specifications, pfm)
  
  # Create the grid of plots
  plot_grid <- plot_grid(
    flagged_plot,
    tile_plot,
    labels = c(pfm, ""),
    ncol = 1,
    align = "v",
    rel_heights = c(0.5, 5)
  )
  
plot_grid
}

Figure: Descriptive Specification Plot

Only Flagged Items

Flagged Plot

# Create a list to hold the flagged plots for each PFM item
flagged_plots <- list()

# Loop over the PFM items and create a flagged plot for each one
for (pfm in pf_items) {
  flagged_plots[[pfm]] <- create_flag_plot(nolr_specifications, pfm)
}

# Combine all the flagged plots into a grid of plots in one column
flagged_grid <- plot_grid(plotlist = flagged_plots, 
                          labels = pf_items,
                          label_size = 8,
                          ncol = 1)

# Display the grid of plots
flagged_grid

Tile Plot

specifications_long <- specifications  %>% 
  pivot_longer(cols = PFM1:PFM9,
               names_to = "item") %>% filter(value == 1)

yvar <- rep(factor(rev(c(
  wf_1_comparison_vec,
  hf_1_correction_vec,
  hf_2_parameter_vec,
  hf_3_criterion_vec)), 
  levels = rev(c(
    wf_1_comparison_vec,
    hf_1_correction_vec,
    hf_2_parameter_vec,
    hf_3_criterion_vec))), 
  times = nrow(specifications_long))

#### Rank each summary effect size by magnitude

x_rank <- rank(specifications_long$value, 
               ties.method = "random")

flagged <- rep(specifications_long$value, 
               each = length(levels(yvar)))
spec <- NULL

#### Determine which specifications are observed and which are not

for(i in 1:nrow(specifications_long)) {
  id <- as.numeric(levels(yvar) %in% 
                     as.character(unlist(
                       specifications_long[i, 1:number_which_how_factors])))  
  spec <- c(spec, id)
}

xvar <- rep(x_rank, 
            each = length(levels(yvar)))

plotdata <- data.frame(xvar, 
                       yvar, 
                       spec,
                       flagged)


# Generate a color palette
num_colors <- length(unique(plotdata$xvar))
colors <- colorRampPalette(c("blue", "purple", "orange"))(num_colors)

# Map each unique xvar to a color
color_map <- setNames(colors, unique(plotdata$xvar))


plotdata <- plotdata %>% 
  mutate(fill = ifelse(spec == 1 & flagged == 1, 2, ifelse(spec == 1 & flagged == 0, 1, 0)))

tile_plot_all_items <- ggplot(data = plotdata, 
                              aes(x = xvar, 
                                  y = as.factor(yvar), 
                                  fill = factor(fill))) +
  geom_raster() + 
  geom_hline(yintercept = length_of_each_factor + 0.5) +  # Change lines here here
  scale_x_continuous(position = "bottom") +
  scale_y_discrete(labels = ylabels) +
  scale_fill_manual(values = c("white", "orange")) +
  labs(x = "Specification number", 
       y = "Which/How factors") +
  coord_cartesian(
    expand = F, 
    xlim = c(0.5, nrow(specifications_long) + 0.5)) +
  theme_bw() + 
  theme(legend.position = "none",
        axis.text.y = element_text(colour = "black", size = 8),
        axis.text.x = element_text(colour = "black"),
        axis.ticks = element_line(colour = "black"),
        plot.margin = margin(t = 5.5, 
                             r = 5.5, 
                             b = 5.5, 
                             l = 5.5, 
                             unit = "pt"))

tile_plot_all_items

All Items

specifications_long <- specifications  %>% 
  pivot_longer(cols = PFM1:PFM9,
               names_to = "item") 

yvar <- rep(factor(rev(c(
  wf_1_comparison_vec,
  hf_1_correction_vec,
  hf_2_parameter_vec,
  hf_3_criterion_vec)), 
  levels = rev(c(
    wf_1_comparison_vec,
    hf_1_correction_vec,
    hf_2_parameter_vec,
    hf_3_criterion_vec))), 
  times = nrow(specifications_long))

#### Rank each summary effect size by magnitude

x_rank <- rank(specifications_long$value, 
               ties.method = "random")

flagged <- rep(specifications_long$value, 
               each = length(levels(yvar)))
spec <- NULL

#### Determine which specifications are observed and which are not

for(i in 1:nrow(specifications_long)) {
  id <- as.numeric(levels(yvar) %in% 
                     as.character(unlist(
                       specifications_long[i, 1:number_which_how_factors])))  
  spec <- c(spec, id)
}

xvar <- rep(x_rank, 
            each = length(levels(yvar)))

plotdata <- data.frame(xvar, 
                       yvar, 
                       spec,
                       flagged)


plotdata <- plotdata %>% 
  mutate(fill = ifelse(spec == 1 & flagged == 1, 2, ifelse(spec == 1 & flagged == 0, 1, 0)))

Universe tile plot

tile_plot_all_items <- ggplot(data = plotdata, 
                              aes(x = xvar, 
                                  y = as.factor(yvar), 
                                  fill = factor(fill))) +
  geom_raster() + 
  geom_hline(yintercept = length_of_each_factor + 0.5) +  # Change lines here here
  scale_x_continuous(position = "bottom") +
  scale_y_discrete(labels = ylabels) +
  scale_fill_manual(
    values = c("white", cols)) +
  labs(x = "Specification number", 
       y = "Which/How factors") +
  coord_cartesian(
    expand = F, 
    xlim = c(0.5, nrow(specifications_long) + 0.5)) +
  theme_bw() + 
  theme(legend.position = "none",
        axis.text.y = element_text(colour = "black", size = 8),
        axis.text.x = element_text(colour = "black"),
        axis.ticks = element_line(colour = "black"),
        plot.margin = margin(t = 5.5, 
                             r = 5.5, 
                             b = 5.5, 
                             l = 5.5, 
                             unit = "pt"))
tile_plot_all_items

Tables

Overall Flagged Items

All Analyses

result_overall_all <- all_specifications %>%
  dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "sum") %>% 
  mutate(percent = round(sum/nrow(all_specifications)*100, 1)) %>% arrange(desc(percent))

print(result_overall_all) 
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM33      187    68.8
 2 PFM46      176    64.7
 3 PFM16      155    57  
 4 PFM15       99    36.4
 5 PFM51       92    33.8
 6 PFM43       88    32.4
 7 PFM53       87    32  
 8 PFM7        85    31.2
 9 PFM12       84    30.9
10 PFM26       84    30.9
# ℹ 25 more rows
# 5 most flagged items
print(result_overall_all) %>% slice(1:5) %>% pull(variable)
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM33      187    68.8
 2 PFM46      176    64.7
 3 PFM16      155    57  
 4 PFM15       99    36.4
 5 PFM51       92    33.8
 6 PFM43       88    32.4
 7 PFM53       87    32  
 8 PFM7        85    31.2
 9 PFM12       84    30.9
10 PFM26       84    30.9
# ℹ 25 more rows
[1] "PFM33" "PFM46" "PFM16" "PFM15" "PFM51"
# Percentage of flagged items
result_overall_all %>% 
  summarise(sum = sum(sum)) %>% 
  mutate(percent_flagged = sum / (nrow(all_specifications)*35) *100) 
# A tibble: 1 Ɨ 2
    sum percent_flagged
  <dbl>           <dbl>
1  2788            29.3

Only LR

result_overall_lr <- specifications_only_lr %>%
  dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "sum") %>% 
  mutate(percent = sum/nrow(specifications)*100) %>% 
  arrange(desc(percent))

print(result_overall_lr) 
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM15       96    35.3
 2 PFM16       96    35.3
 3 PFM33       96    35.3
 4 PFM43       87    32.0
 5 PFM53       87    32.0
 6 PFM7        85    31.2
 7 PFM27       83    30.5
 8 PFM21       81    29.8
 9 PFM26       81    29.8
10 PFM12       80    29.4
# ℹ 25 more rows
# 5 most flagged items
print(result_overall_lr) %>% slice(1:5) %>% pull(variable)
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM15       96    35.3
 2 PFM16       96    35.3
 3 PFM33       96    35.3
 4 PFM43       87    32.0
 5 PFM53       87    32.0
 6 PFM7        85    31.2
 7 PFM27       83    30.5
 8 PFM21       81    29.8
 9 PFM26       81    29.8
10 PFM12       80    29.4
# ℹ 25 more rows
[1] "PFM15" "PFM16" "PFM33" "PFM43" "PFM53"
# Percentage of flagged items
result_overall_lr %>% summarise(sum = sum(sum)) %>% 
  mutate(percent_flagged = sum / (nrow(specifications_only_lr)*35) *100)
# A tibble: 1 Ɨ 2
    sum percent_flagged
  <dbl>           <dbl>
1  2493            74.2

No LR

result_overall <- nolr_specifications %>%
  dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "sum") %>% 
  mutate(percent = round(sum/nrow(specifications)*100, 1)) %>% arrange(desc(percent))

print(result_overall) 
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM46      104    38.2
 2 PFM33       91    33.5
 3 PFM16       59    21.7
 4 PFM51       18     6.6
 5 PFM40        5     1.8
 6 PFM12        4     1.5
 7 PFM15        3     1.1
 8 PFM26        3     1.1
 9 PFM38        2     0.7
10 PFM44        2     0.7
# ℹ 25 more rows
# 5 most flagged items
print(result_overall) %>% slice(1:5) %>% pull(variable)
# A tibble: 35 Ɨ 3
   variable   sum percent
   <chr>    <dbl>   <dbl>
 1 PFM46      104    38.2
 2 PFM33       91    33.5
 3 PFM16       59    21.7
 4 PFM51       18     6.6
 5 PFM40        5     1.8
 6 PFM12        4     1.5
 7 PFM15        3     1.1
 8 PFM26        3     1.1
 9 PFM38        2     0.7
10 PFM44        2     0.7
# ℹ 25 more rows
[1] "PFM46" "PFM33" "PFM16" "PFM51" "PFM40"
# Percentage of flagged items
result_overall %>% 
  summarise(sum = sum(sum)) %>% 
  mutate(percent_flagged = sum / (nrow(nolr_specifications)*35) *100) 
# A tibble: 1 Ɨ 2
    sum percent_flagged
  <dbl>           <dbl>
1   295            4.79
data_items_ger <-  read_excel("data/tidy/items_ao2.xlsx", sheet = 3)

data_items_eng <- read_excel("data/tidy/items_ao2.xlsx", sheet = 1) %>% 
  filter(id %in% data_items_ger$id)

data_items_eng %>% left_join(result_overall, by = c("id" = "variable")) %>% drop_na(sum) %>% 
  dplyr::select(`PROMIS Item` = id,
                `Item Stem` = item,
                # `Response Categories` = response,
                `k` = sum,
                Percent = percent)
# A tibble: 35 Ɨ 4
   `PROMIS Item` `Item Stem`                                           k Percent
   <chr>         <chr>                                             <dbl>   <dbl>
 1 PFM1          Are you able to dig a 2-foot (1/2 m) deep hole i…     1     0.4
 2 PFM2          Are you able to lift a heavy painting or picture…     0     0  
 3 PFM3          Are you able to paint the walls of a room with a…     0     0  
 4 PFM4          Are you able to row a boat for 30 minutes withou…     0     0  
 5 PFM6          Are you able to hand wash and wax a car for 2 ho…     0     0  
 6 PFM7          Are you able to complete 5 push-ups without stop…     0     0  
 7 PFM9          Are you able to rake leaves or sweep for an hour…     0     0  
 8 PFM10         Are you able to do a pull-up?                         0     0  
 9 PFM12         Are you able to lift a heavy object (20 lbs/10 k…     4     1.5
10 PFM15         Are you able to hit the backboard with a basketb…     3     1.1
# ℹ 25 more rows

At least X flagged item(s)

nolr_specifications %>% filter(k_flagged>=1) %>% nrow() / nrow(nolr_specifications)*100
[1] 66.47727
nolr_specifications %>% filter(k_flagged>=2) %>% nrow() / nrow(nolr_specifications)*100
[1] 47.15909
nolr_specifications %>% filter(k_flagged>=3) %>% nrow() / nrow(nolr_specifications)*100
[1] 33.52273
nolr_specifications %>% filter(k_flagged>=4) %>% nrow() / nrow(nolr_specifications)*100
[1] 10.22727
nolr_specifications %>% filter(k_flagged>=5) %>% nrow() / nrow(nolr_specifications)*100
[1] 3.977273
nolr_specifications %>% filter(k_flagged>=6) %>% nrow() / nrow(nolr_specifications)*100
[1] 1.704545
nolr_specifications %>% filter(k_flagged>=7) %>% nrow() / nrow(nolr_specifications)*100
[1] 1.704545
nolr_specifications %>% filter(k_flagged>=8) %>% nrow() / nrow(nolr_specifications)*100
[1] 1.704545
nolr_specifications %>% filter(k_flagged>=9) %>% nrow() / nrow(nolr_specifications)*100
[1] 0.5681818
nolr_specifications %>% filter(k_flagged>=10) %>% nrow() / nrow(nolr_specifications)*100
[1] 0.5681818
nolr_specifications %>% arrange(desc(k_flagged)) 
# A tibble: 176 Ɨ 44
   wf_1_comparison hf_1_correction hf_2_parameter_vector hf_3_criterion
   <chr>           <chr>           <chr>                 <chr>         
 1 ger-arg         no              parameters_promis     beta: 0.05    
 2 usa-arg         no              parameters_multigroup beta: 0.05    
 3 usa-arg         no              parameters_promis     beta: 0.05    
 4 ger-arg         no              parameters_promis     CoxSnell: 0.02
 5 usa-ger-arg     no              parameters_multigroup beta: 0.05    
 6 ger-arg         no              parameters_multigroup beta: 0.05    
 7 usa-ger-arg     no              parameters_promis     beta: 0.05    
 8 usa-ger-arg     age             parameters_multigroup CoxSnell: 0.02
 9 ger-arg         age             parameters_multigroup CoxSnell: 0.02
10 usa-ger-arg     no              parameters_multigroup CoxSnell: 0.02
# ℹ 166 more rows
# ℹ 40 more variables: hf_4_threshold <dbl>, k_flagged <int>,
#   hf_2_parameter <chr>, V1 <dbl>, PFM1 <dbl>, PFM10 <dbl>, PFM12 <dbl>,
#   PFM15 <dbl>, PFM16 <dbl>, PFM17 <dbl>, PFM18 <dbl>, PFM19 <dbl>,
#   PFM2 <dbl>, PFM21 <dbl>, PFM23 <dbl>, PFM25 <dbl>, PFM26 <dbl>,
#   PFM27 <dbl>, PFM28 <dbl>, PFM29 <dbl>, PFM3 <dbl>, PFM32 <dbl>,
#   PFM33 <dbl>, PFM34 <dbl>, PFM35 <dbl>, PFM36 <dbl>, PFM37 <dbl>, …
  1. Item PFM46 was flagged for DIF in 38.2% of all analyses conducted.

  2. In 33.5% of the analyses performed, item PFM33 was flagged.

  3. 21.7% of all conducted analyses indicated a flag for item PFM16.

  4. The analysis flagged item PFM51 in 6.6% of the cases.

  5. Throughout all analyses, item PFM40 was flagged in 1.8% of them.


create_flagged_item_table()

Function that takes the specification data and creates summary tables of flagged items

create_flagged_item_level_table <- function(data, variable) {
  data %>%
    dplyr::group_by({{variable}}) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -{{variable}}, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/nrow(data)*100) %>% 
    dplyr::arrange(desc(sum)) 
}

create_flagged_item_table <- function(data, variable) {
  data %>%
    dplyr::group_by({{variable}}) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -{{variable}}, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/nrow(data)*100) %>% 
    dplyr::arrange(desc(sum)) %>% 
    dplyr::group_by({{variable}})%>% 
    dplyr::summarise(sum = sum(sum)) %>% 
    dplyr::mutate(percent_flagged = sum / (nrow(data)*35) *100)
}
Nagelkerke <- c("Nagelkerke: 0.05", "Nagelkerke: 0.03", "Nagelkerke: 0.02")
CoxSnell <- c("CoxSnell: 0.05", "CoxSnell: 0.03", "CoxSnell: 0.02")
McFadden <- c("McFadden: 0.05", "McFadden: 0.03", "McFadden: 0.02")
beta <- c("beta: 0.1", "beta: 0.05")
method <- c(Nagelkerke, CoxSnell, McFadden, beta)
correction <- c("no", "age")
parameters <- c("parameters_promis", "parameters_multigroup")
country <- c("ger-arg", "usa-arg", "usa-ger", "usa-ger-arg")

By wf_1_comparison

all_specifications %>%
    dplyr::group_by(wf_1_comparison) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -wf_1_comparison, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/(nrow(all_specifications)/4)*100)
# A tibble: 140 Ɨ 4
   wf_1_comparison variable   sum percent
   <chr>           <chr>    <dbl>   <dbl>
 1 ger-arg         PFM1        20    29.4
 2 ger-arg         PFM10        8    11.8
 3 ger-arg         PFM12        8    11.8
 4 ger-arg         PFM15       24    35.3
 5 ger-arg         PFM16       60    88.2
 6 ger-arg         PFM17        8    11.8
 7 ger-arg         PFM18       24    35.3
 8 ger-arg         PFM19       24    35.3
 9 ger-arg         PFM2        11    16.2
10 ger-arg         PFM21        9    13.2
# ℹ 130 more rows

In all analyses for ger-arg, 27.85% of items were flagged for dif

all_specifications %>%
    dplyr::group_by(wf_1_comparison) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -wf_1_comparison, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/nrow(all_specifications)*100) %>% 
    dplyr::arrange(wf_1_comparison)  %>% 
    dplyr::group_by(wf_1_comparison)%>% 
    dplyr::summarise(sum = sum(sum)) %>% 
    dplyr::mutate(percent_flagged = sum / (nrow(all_specifications)*35/4) *100)
# A tibble: 4 Ɨ 3
  wf_1_comparison   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 ger-arg           666            28.0
2 usa-arg           646            27.1
3 usa-ger           579            24.3
4 usa-ger-arg       897            37.7

much better denominator: in all analyses for wf_1_comparison = ger-arg, PFM1 was flagged 30.8 percent of the time

nolr_specifications %>%
    dplyr::group_by(wf_1_comparison) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -wf_1_comparison, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/(nrow(all_specifications)/4)*100)
# A tibble: 140 Ɨ 4
   wf_1_comparison variable   sum percent
   <chr>           <chr>    <dbl>   <dbl>
 1 ger-arg         PFM1         1    1.47
 2 ger-arg         PFM10        0    0   
 3 ger-arg         PFM12        0    0   
 4 ger-arg         PFM15        0    0   
 5 ger-arg         PFM16       36   52.9 
 6 ger-arg         PFM17        0    0   
 7 ger-arg         PFM18        0    0   
 8 ger-arg         PFM19        0    0   
 9 ger-arg         PFM2         0    0   
10 ger-arg         PFM21        0    0   
# ℹ 130 more rows

In all analyses for ger-arg, 27.85% of items were flagged for dif

nolr_specifications %>%
    dplyr::group_by(wf_1_comparison) %>%
    dplyr::summarise(across(contains("PF"), sum, .names = "{.col}")) %>%
    pivot_longer(cols = -wf_1_comparison, names_to = "variable", values_to = "sum") %>% 
    dplyr::mutate(percent = sum/nrow(all_specifications)*100) %>% 
    dplyr::arrange(wf_1_comparison)  %>% 
    dplyr::group_by(wf_1_comparison)%>% 
    dplyr::summarise(sum = sum(sum)) %>% 
    dplyr::mutate(percent_flagged = sum / (nrow(all_specifications)*35/4) *100)
# A tibble: 4 Ɨ 3
  wf_1_comparison   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 ger-arg           138           5.80 
2 usa-arg            63           2.65 
3 usa-ger             5           0.210
4 usa-ger-arg        89           3.74 

All Specifications

result_wf1_all <- create_flagged_item_table(all_specifications, wf_1_comparison)
result_wf1_all
# A tibble: 4 Ɨ 3
  wf_1_comparison   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 ger-arg           666            7.00
2 usa-arg           646            6.79
3 usa-ger           579            6.08
4 usa-ger-arg       897            9.42
create_flagged_item_level_table(all_specifications, wf_1_comparison)
# A tibble: 140 Ɨ 4
   wf_1_comparison variable   sum percent
   <chr>           <chr>    <dbl>   <dbl>
 1 ger-arg         PFM33       68    25  
 2 ger-arg         PFM46       64    23.5
 3 ger-arg         PFM16       60    22.1
 4 usa-arg         PFM46       60    22.1
 5 usa-ger-arg     PFM33       57    21.0
 6 usa-ger-arg     PFM46       52    19.1
 7 usa-ger-arg     PFM16       44    16.2
 8 usa-arg         PFM33       38    14.0
 9 ger-arg         PFM51       34    12.5
10 usa-ger         PFM51       28    10.3
# ℹ 130 more rows

Specifications without LR

result_wf1_nolr <- create_flagged_item_table(nolr_specifications, wf_1_comparison)
result_wf1_nolr
# A tibble: 4 Ɨ 3
  wf_1_comparison   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 ger-arg           138          2.24  
2 usa-arg            63          1.02  
3 usa-ger             5          0.0812
4 usa-ger-arg        89          1.44  
create_flagged_item_level_table(nolr_specifications, wf_1_comparison)
# A tibble: 140 Ɨ 4
   wf_1_comparison variable   sum percent
   <chr>           <chr>    <dbl>   <dbl>
 1 ger-arg         PFM33       44   25   
 2 ger-arg         PFM46       40   22.7 
 3 ger-arg         PFM16       36   20.5 
 4 usa-arg         PFM46       36   20.5 
 5 usa-ger-arg     PFM33       33   18.8 
 6 usa-ger-arg     PFM46       28   15.9 
 7 usa-ger-arg     PFM16       20   11.4 
 8 usa-arg         PFM33       14    7.95
 9 ger-arg         PFM51       10    5.68
10 usa-ger         PFM51        4    2.27
# ℹ 130 more rows

Double check with alternative process

plotdata %>% 
  filter(yvar %in% country) %>% 
  group_by(yvar) %>% 
  dplyr::summarise(times_flagged = sum(fill ==2))  %>% 
  mutate(percent = times_flagged / (length(unique(plotdata$xvar)))*100)
# A tibble: 4 Ɨ 3
  yvar        times_flagged percent
  <fct>               <int>   <dbl>
1 ger-arg               666    7.00
2 usa-arg               646    6.79
3 usa-ger               579    6.08
4 usa-ger-arg           897    9.42

Text

result_wf1 <- result_wf1_all

For the comparison between ger-arg, 7% of all items were flagged for DIF.

Comparing usa-arg, we found that 6.8% of the total items were indicated for DIF.

When comparing usa-ger, 6.1% of all items were flagged for DIF.

Comparing usa-ger-arg, it was noted that 9.4% of the items were marked for DIF.


By hf_1_parameter

All Specs

result_hf_1_all <- create_flagged_item_table(all_specifications, hf_1_correction)
result_hf_1_all
# A tibble: 2 Ɨ 3
  hf_1_correction   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 age              1344            14.1
2 no               1444            15.2

No LR Specs

result_hf_1_nolr <- create_flagged_item_table(nolr_specifications, hf_1_correction)
result_hf_1_nolr
# A tibble: 2 Ɨ 3
  hf_1_correction   sum percent_flagged
  <chr>           <dbl>           <dbl>
1 age               122            1.98
2 no                173            2.81

Double check with alternative process

plotdata %>% 
  filter(yvar %in% correction) %>% 
  group_by(yvar) %>% 
  dplyr::summarise(times_flagged = sum(fill ==2)) %>%
  mutate(percent = times_flagged / (length(unique(plotdata$xvar))) *100)
# A tibble: 2 Ɨ 3
  yvar  times_flagged percent
  <fct>         <int>   <dbl>
1 no             1444    15.2
2 age            1344    14.1

By hf_2_parameter

All Specs

result_hf_2_all <- create_flagged_item_table(all_specifications, hf_2_parameter_vector)
result_hf_2_all
# A tibble: 2 Ɨ 3
  hf_2_parameter_vector   sum percent_flagged
  <chr>                 <dbl>           <dbl>
1 parameters_multigroup  1400            14.7
2 parameters_promis      1388            14.6

No LR

result_hf_2_nolr <- create_flagged_item_table(nolr_specifications, hf_2_parameter_vector)
result_hf_2_nolr
# A tibble: 2 Ɨ 3
  hf_2_parameter_vector   sum percent_flagged
  <chr>                 <dbl>           <dbl>
1 parameters_multigroup   149            2.42
2 parameters_promis       146            2.37

Double check with alternative process

plotdata %>% filter(yvar %in% parameters) %>% group_by(yvar) %>% dplyr::summarise(times_flagged = sum(fill ==2))  %>% 
  mutate(percent = times_flagged / (length(unique(plotdata$xvar))) *100)
# A tibble: 2 Ɨ 3
  yvar                  times_flagged percent
  <fct>                         <int>   <dbl>
1 parameters_promis              1388    14.6
2 parameters_multigroup          1400    14.7

Text

result_hf_2 <- result_hf_2_nolr

When analyzing parameters_multigroup, it was observed that 2.4% of all items were flagged for DIF. When using parameters_promis, 2.4% of all items were detected to have DIF.

By hf_3_criterion

All Specs

result_hf_3_all <- create_flagged_item_table(all_specifications, hf_3_criterion) 

result_hf_3_percent_method_all <- result_hf_3_all %>% 
  mutate(method = case_when(
    str_detect(hf_3_criterion, "Cox") ~ "R2 Cox",
    str_detect(hf_3_criterion, "McFadden") ~ "R2 McFadden",
    str_detect(hf_3_criterion, "Nagelkerke") ~ "R2 Nagelkerke",
    str_detect(hf_3_criterion, "beta") ~ "beta",
    str_detect(hf_3_criterion, "lr") ~ "lr",
    TRUE ~ "other" # This will assign "other" to rows where none of the above conditions are TRUE. You can adjust as needed.
  )) %>% 
  dplyr::group_by(method) %>% 
  dplyr::summarise(sum = sum(sum)) %>% 
  dplyr::mutate(percent_flagged = sum / (nrow(all_specifications)*35) *100) # nrow(specifications) analyses by 35 potential item flags
result_hf_3_percent_method_all
# A tibble: 5 Ɨ 3
  method          sum percent_flagged
  <chr>         <dbl>           <dbl>
1 R2 Cox          104           1.09 
2 R2 McFadden      66           0.693
3 R2 Nagelkerke    46           0.483
4 beta             79           0.830
5 lr             2493          26.2  

No LR

result_hf_3_nolr <- create_flagged_item_table(nolr_specifications, hf_3_criterion) 

result_hf_3_percent_method_nolr <- result_hf_3_nolr %>% 
  mutate(method = case_when(
    str_detect(hf_3_criterion, "Cox") ~ "R2 Cox",
    str_detect(hf_3_criterion, "McFadden") ~ "R2 McFadden",
    str_detect(hf_3_criterion, "Nagelkerke") ~ "R2 Nagelkerke",
    str_detect(hf_3_criterion, "beta") ~ "beta",
    str_detect(hf_3_criterion, "lr") ~ "lr",
    TRUE ~ "other" # This will assign "other" to rows where none of the above conditions are TRUE. You can adjust as needed.
  )) %>% 
  dplyr::group_by(method) %>% 
  dplyr::summarise(sum = sum(sum)) %>% 
  dplyr::mutate(percent_flagged = sum / (nrow(nolr_specifications)*35) *100) # nrow(specifications) analyses by 35 potential item flags
result_hf_3_percent_method_nolr
# A tibble: 4 Ɨ 3
  method          sum percent_flagged
  <chr>         <dbl>           <dbl>
1 R2 Cox          104           1.69 
2 R2 McFadden      66           1.07 
3 R2 Nagelkerke    46           0.747
4 beta             79           1.28 

Table All Specs

colnames(result_wf1_all)[1] <- "id"
colnames(result_hf_1_all)[1] <- "id"
colnames(result_hf_2_all)[1] <- "id"
colnames(result_hf_3_percent_method_all)[1] <- "id"

table_all_specs <- bind_rows(
  result_wf1_all,
  result_hf_1_all,
  result_hf_2_all,
  result_hf_3_percent_method_all)

Table No LR Specs

colnames(result_wf1_nolr)[1] <- "id"
colnames(result_hf_1_nolr)[1] <- "id"
colnames(result_hf_2_nolr)[1] <- "id"
colnames(result_hf_3_percent_method_nolr)[1] <- "id"

table_nolr <- bind_rows(
  result_wf1_nolr,
  result_hf_1_nolr,
  result_hf_2_nolr,
  result_hf_3_percent_method_nolr)

full_join(table_all_specs, table_nolr, by = "id") %>% 
  mutate(percent_flagged.x = round(percent_flagged.x, 1),
         percent_flagged.y = round(percent_flagged.y, 1),
         ) %>% 
  dplyr::select(Specification = id,
         "Items Flagged" = sum.x,
         "Flagged All Methods (%)" = percent_flagged.x,
         "Flagged LR Excluded (%)" = percent_flagged.y) 
# A tibble: 13 Ɨ 4
   Specification   `Items Flagged` Flagged All Methods …¹ Flagged LR Excluded …²
   <chr>                     <dbl>                  <dbl>                  <dbl>
 1 ger-arg                     666                    7                      2.2
 2 usa-arg                     646                    6.8                    1  
 3 usa-ger                     579                    6.1                    0.1
 4 usa-ger-arg                 897                    9.4                    1.4
 5 age                        1344                   14.1                    2  
 6 no                         1444                   15.2                    2.8
 7 parameters_mul…            1400                   14.7                    2.4
 8 parameters_pro…            1388                   14.6                    2.4
 9 R2 Cox                      104                    1.1                    1.7
10 R2 McFadden                  66                    0.7                    1.1
11 R2 Nagelkerke                46                    0.5                    0.7
12 beta                         79                    0.8                    1.3
13 lr                         2493                   26.2                   NA  
# ℹ abbreviated names: ¹​`Flagged All Methods (%)`, ²​`Flagged LR Excluded (%)`

Figures: Amount of Flagged Items per DIF Analysis

Setup

data_flagged_items <- all_specifications %>%
  #filter(method != "lr") %>% 
  rowwise() %>% 
  dplyr::mutate(sum = sum(c_across(PFM1:PFM9))) %>%
  ungroup() %>%  
  mutate(
    wf_1_comparison = toupper(wf_1_comparison), # Convert to uppercase
    hf_1_correction = sapply(hf_1_correction, function(x) str_to_title(x)), # First letter uppercase for each word
    hf_2_parameter = case_when(
      hf_2_parameter == "parameters_promis" ~ "PROMIS",
      hf_2_parameter == "parameters_multigroup" ~ "Multigroup",
      TRUE ~ hf_2_parameter # Default case if there are other values
    ),
    hf_3_criterion = case_match(
      hf_3_criterion,
      "lr: 0.01"     ~ "LR: 0.01",
      "lr: 0.05"     ~ "LR: 0.05",
      "lr_bon: 0.01" ~ "LR Bonferroni: 0.01",
      "lr_bon: 0.05" ~ "LR Bonferroni: 0.05",
      "lr_ben: 0.01" ~ "LR BH: 0.01",
      "lr_ben: 0.05" ~ "LR BH: 0.05",
      .default =  hf_3_criterion # Default case if there are other values
    )
  )
data_flagged_items$hf_3_criterion = factor(data_flagged_items$hf_3_criterion, 
                                           levels = rev(c("CoxSnell: 0.02", 
                                                          "CoxSnell: 0.03", 
                                                          "CoxSnell: 0.05", 
                                                          "McFadden: 0.02", 
                                                          "McFadden: 0.03", 
                                                          "McFadden: 0.05",
                                                          "Nagelkerke: 0.02", 
                                                          "Nagelkerke: 0.03", 
                                                          "Nagelkerke: 0.05",
                                                          "Nagelkerke", 
                                                          "beta: 0.1",
                                                          "beta: 0.05",
                                                          "LR: 0.01",
                                                          "LR: 0.05",
                                                          "LR Bonferroni: 0.01",
                                                          "LR Bonferroni: 0.05",
                                                          "LR BH: 0.01",
                                                          "LR BH: 0.05")))

Histogram

hist_flagged_items <- data_flagged_items %>% 
  ggplot(aes(x = sum)) +
  geom_histogram(binwidth = 1, color = "black", fill = "#21918c") +
  theme_minimal() +
  labs(title = "Histogram: Amount of Flagged Items From Each DIF Analysis", 
       x = "Number of Flagged Items per Analysis", 
       y = "Frequency of Analyses")

hist_flagged_items

Distribution

p1 <- data_flagged_items %>%
  ggplot(aes(x = wf_1_comparison, y = sum, fill = wf_1_comparison)) +
  geom_boxplot(alpha = 0.5) +
    geom_jitter(alpha = 0.3) +
  scale_fill_viridis_d() +
  labs(title = "a) Specification: Country Comparison", x = "Country Comparison", y = "Number of Flagged Items") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

p2 <- data_flagged_items %>%
  ggplot(aes(x = hf_1_correction, y = sum, fill = hf_1_correction)) +
  geom_boxplot(alpha = 0.5) +
    geom_jitter(alpha = 0.3) +
  scale_fill_viridis_d() +
  labs(title = "b) Specification: Type of Correction", x = "Correction", y = "Number of Flagged Items") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

p3 <- data_flagged_items %>%
  ggplot(aes(x = hf_2_parameter, y = sum, fill = hf_2_parameter)) +
  geom_boxplot(alpha = 0.5) +
  geom_jitter(alpha = 0.3) +
  scale_fill_viridis_d() +
  labs(title = "c) Specification: Type of Item Parameters", x = "Item Parameters", y = "Number of Flagged Items") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

p4 <- data_flagged_items %>%
  ggplot(aes(x = hf_3_criterion, y = sum, fill = hf_3_criterion)) +
  geom_boxplot(alpha = 0.5) +
    geom_jitter(alpha = 0.3) +
  scale_fill_viridis_d() +
  labs(title = "d) Specification: DIF Flagging Criterion and Threshold", x = "Method", y = "Number of Flagged Items") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

combined_plot123 <- plot_grid(p1, p2, p3, ncol = 1, align = 'v')

combined_plot <- plot_grid(combined_plot123, p4, ncol = 2, align = 'v')

print(combined_plot)

data_flagged_items %>%
    filter(wf_1_comparison == "") %>% 
    ggplot(aes(x = wf_1_comparison, y = sum, fill = wf_1_comparison)) +
    geom_boxplot(alpha = 0.5) +
    scale_fill_viridis_d() +
    labs(title = "Specification: Country Comparison", x = "Country Comparison", y = "Number of Flagged Items") +
    theme_minimal() +
    theme(legend.position = "none") +
    coord_flip()

plot_filterd_multiverse <- function(variable, value) {
  
  p1 <- data_flagged_items %>%
    filter({{variable}} == value) %>% 
    ggplot(aes(x = wf_1_comparison, y = sum, fill = wf_1_comparison)) +
    geom_boxplot(alpha = 0.5) +
    scale_fill_viridis_d() +
    labs(title = "Specification: Country Comparison", x = "Country Comparison", y = "Number of Flagged Items") +
    theme_minimal() +
    theme(legend.position = "none") +
    coord_flip()
  
  p2 <- data_flagged_items %>%
    filter({{variable}} == value) %>% 
    ggplot(aes(x = hf_1_correction, y = sum, fill = hf_1_correction)) +
    geom_boxplot(alpha = 0.5) +
    scale_fill_viridis_d() +
    labs(title = "Specification: Type of Correction", x = "Correction", y = "Number of Flagged Items") +
    theme_minimal() +
    theme(legend.position = "none") +
    coord_flip()
  
  p3 <- data_flagged_items %>%
    filter({{variable}} == value) %>% 
    ggplot(aes(x = hf_2_parameter, y = sum, fill = hf_2_parameter)) +
    geom_boxplot(alpha = 0.5) +
    scale_fill_viridis_d() +
    labs(title = "Specification: Type of Item Parameters", x = "Item Parameters", y = "Number of Flagged Items") +
    theme_minimal() +
    theme(legend.position = "none") +
    coord_flip()
  
  p4 <- data_flagged_items %>%
    filter({{variable}} == value) %>% 
    ggplot(aes(x = hf_3_criterion, y = sum, fill = hf_3_criterion)) +
    geom_boxplot(alpha = 0.5) +
    scale_fill_viridis_d() +
    labs(title = "Specification: DIF Flagging Criterion and Threshold", x = "Method", y = "Number of Flagged Items") +
    theme_minimal() +
    theme(legend.position = "none") +
    coord_flip()
  
  combined_plot123 <- plot_grid(p1, p2, p3, ncol = 1, align = 'v')
  
  combined_plot <- plot_grid(combined_plot123, p4, ncol = 2, align = 'v')
  
  print(combined_plot)
  
}
plot_filterd_multiverse(variable = wf_1_comparison, value = "USA-GER-ARG")

plot_filterd_multiverse(wf_1_comparison, "GER-ARG")

plot_filterd_multiverse(hf_1_correction, "Age")